#import libraries for...
#plotting
require(gplots)
require(ggplot2)
require(ggthemes)
require(scales)
require(grid)
#modelling
require(car)
require(fitdistrplus)
require(lme4)
require(nlme)
#data wrangl'n
require(gdata)
require(reshape2)
require(data.table)
require(dplyr)
require(tidyr)
require(purrr)
sessionInfo() #for reproducibility
## R version 3.2.0 (2015-04-16)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 20 (Heisenbug)
##
## locale:
## [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
## [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
## [7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] purrr_0.2.1 tidyr_0.4.1 dplyr_0.4.3
## [4] data.table_1.9.6 reshape2_1.2.2 gdata_2.17.0
## [7] nlme_3.1-128 lme4_1.1-12 Matrix_1.2-0
## [10] fitdistrplus_1.0-6 car_2.0-16 nnet_7.3-9
## [13] MASS_7.3-40 scales_0.4.0 ggthemes_3.0.3
## [16] ggplot2_2.1.0 gplots_3.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.3 formatR_1.3 nloptr_1.0.4
## [4] plyr_1.8 bitops_1.0-6 tools_3.2.0
## [7] digest_0.6.4 evaluate_0.8.3 gtable_0.1.2
## [10] lattice_0.20-31 DBI_0.3.1 parallel_3.2.0
## [13] yaml_2.1.13 stringr_0.6.2 knitr_1.12.3
## [16] gtools_3.5.0 caTools_1.17.1 R6_2.1.2
## [19] survival_2.38-1 rmarkdown_0.9.5 minqa_1.2.4
## [22] magrittr_1.5 htmltools_0.3.5 splines_3.2.0
## [25] assertthat_0.1 colorspace_1.2-4 KernSmooth_2.23-14
## [28] munsell_0.4.2 chron_2.3-47
original_par <- par() #for resetting to original par after generating the plot in the null device
import a variety of experimental done with non-standard annoation. they are all done according to the protocol in the methods, but the conditions are frequently changed
import the data from csv
raw_data1 <- read.csv('Data/crystal_violet_biofilm/160524_cv_merge.csv',comment.char = '#')
raw_data2 <- read.csv('Data/crystal_violet_biofilm/160316_cv_mutants_full1.csv',comment.char = '#')
raw_data2[,"date"] <- rep(160316, 240) # add dates for those instances it does not exist
raw_data3 <- read.csv('Data/crystal_violet_biofilm/160324_cv_mutants_full2.csv', comment.char = '#')
raw_data3[,"date"] <- rep(160324, 528)
raw_data3 <- filter(raw_data3, plate != 'p8', plate != 'p7', plate != 'p6', plate != 'p5') #those plates are diluted repeats
raw_data4 <- read.csv('Data/crystal_violet_biofilm/160326_cv_mutants_full3.csv', comment.char = '#')
raw_data4[,"date"] <- rep(160326, 264)
raw_data5 <- read.csv('Data/crystal_violet_biofilm/160505_cv_o2.csv', comment.char = '#')
raw_data5[,"date"] <- rep(160505, 264)
raw_data6 <- read.csv('Data/crystal_violet_biofilm/160414_cv_mutants_method.csv', comment.char = '#')
raw_data6[,"date"] <- rep(160414, 144)
raw_data7 <- read.csv('Data/crystal_violet_biofilm/160416_cv_mutants_method2.csv', comment.char = '#')
raw_data7[,"date"] <- rep(160416, 198)
the raw data is melted and reshaped standardizing the column names and entries (somewhat) to make it easier to work with
data_list <- list(raw_data1, raw_data2, raw_data3, raw_data4, raw_data5, raw_data6, raw_data7)
melt_pool <- melt(data_list[1], id.vars = c('sample_strain', 'sample_id', 'location', 'date'))
for(i in 2:length(data_list)){
m <- melt(data_list[i], id.vars = c('sample_strain', 'sample_id', 'location', 'date'))
melt_pool <- rbind(melt_pool, m)
# str(m) for debug
} #merge molten data
cast_data <- dcast(melt_pool, location + sample_strain + sample_id + date ~ variable, fill = "not_reported") #recast
cast_data <- data.table(cast_data) #first data.table useage!
cast_data[sample_strain == "", sample_strain := "LAC"] #update those rows with sample_strain=="" to "LAC"
cast_data[media == "not_reported", media := "TSB"] #absent media is TSB
cast_data[coating == "not_reported" | coating == "", coating := "none"] #absent coating is none
cast_data[O2 == "not_reported", O2 := "aerobic"] #absent O2 is TSB
cast_data[date == "160326", drop := 'true'] #remove these based on their lack of useable data (found when looking at the raw_platemap)
#EDIT after investigation remove all of 160326
cast_data[plate == "p3" & date == "160324", drop := 'true']
cast_data[sample_strain == "LAC" & sample_id == "empty", sample_strain := "empty"] #fix up this improper labelling
cast_data[drop == "not_reported", drop := "F"]
str(cast_data)
## Classes 'data.table' and 'data.frame': 1662 obs. of 13 variables:
## $ location : Factor w/ 540 levels "p1b1","p1b10",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ sample_strain: Factor w/ 6 levels "22251","empty",..: 2 2 2 2 3 3 3 3 3 3 ...
## $ sample_id : Factor w/ 40 levels "","agrA_C123F_20",..: 1 1 1 7 12 12 12 3 4 5 ...
## $ date : num 160505 160508 160511 160414 160324 ...
## $ plate : chr "p1" "p1" "p1" "p1" ...
## $ well : chr "b1" "b1" "b1" "b1" ...
## $ media : chr "M9" "TSB" "TSB" "0.5_glu_3_nacl" ...
## $ drop : chr "F" "F" "F" "F" ...
## $ dilution : chr "not_reported" "0.1" "0.1" "1" ...
## $ O2 : chr "anaerobic" "aerobic" "anaerobic" "aerobic" ...
## $ OD : chr "0.166" "0.1254000068" "0.1400000006" "0.139200002" ...
## $ coating : chr "none" "none" "none" "none" ...
## $ fixation : chr "not_reported" "not_reported" "not_reported" "60C" ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "index")= int
the data is now flagged and the types are changed to work better with downstream exploration and visulaization
working_data <- cast_data %>%
transform(date = as.factor(date),
plate = as.factor(plate),
coating = as.factor(coating),
OD = as.numeric(OD)) %>% #convert date, plate to a factor and OD into numeric, merge the mutants together
filter(sample_strain == 'LAC' | sample_strain == "empty",
media == "TSB" | media == "0.5_glu",
O2 == "aerobic") %>% #extract only the data for which there will be comparative rafts, also exclude media comparisons
mutate(row = as.factor(ifelse(grepl("[a-g][0-9]{1,2}", as.character(well)) == TRUE,
sub("([a-g])[0-9]{1,2}", "\\1", as.character(well)),
"error")),
col = as.factor(ifelse(grepl("[a-g][0-9]{1,2}", as.character(well)) == TRUE,
sub("[a-g]([0-9]{1,2})", "\\1", as.character(well)),
"error")), #process and flag data with positional information
drop = as.logical(drop)) %>%
separate(sample_id, c("genetic_background", "exogenous_genetic"), sep = "_[KOermCF123]{0,5}_?",
extra = "merge", remove = FALSE) #separate the sample_id into components
str(working_data)
## Classes 'data.table' and 'data.frame': 888 obs. of 17 variables:
## $ location : Factor w/ 540 levels "p1b1","p1b10",..: 1 1 1 1 2 2 2 2 2 3 ...
## $ sample_strain : Factor w/ 6 levels "22251","empty",..: 2 3 3 3 3 3 3 3 3 2 ...
## $ sample_id : Factor w/ 40 levels "","agrA_C123F_20",..: 1 12 12 12 4 8 23 23 38 1 ...
## $ genetic_background: chr "" "wt" "wt" "wt" ...
## $ exogenous_genetic : chr NA NA NA NA ...
## $ date : Factor w/ 8 levels "160316","160324",..: 7 2 3 5 5 7 2 3 4 1 ...
## $ plate : Factor w/ 4 levels "p1","p2","p3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ well : chr "b1" "b1" "b1" "b1" ...
## $ media : chr "TSB" "TSB" "TSB" "0.5_glu" ...
## $ drop : logi FALSE FALSE TRUE FALSE FALSE TRUE ...
## $ dilution : chr "0.1" "0.1" "1" "1" ...
## $ O2 : chr "aerobic" "aerobic" "aerobic" "aerobic" ...
## $ OD : num 0.125 0.372 0.536 2.898 2.907 ...
## $ coating : Factor w/ 3 levels "0.01_FBS","FBS",..: 3 1 1 3 3 3 1 1 3 1 ...
## $ fixation : chr "not_reported" "60C" "60C" "60C" ...
## $ row : Factor w/ 6 levels "b","c","d","e",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ col : Factor w/ 12 levels "1","10","11",..: 1 1 1 1 2 2 2 2 2 3 ...
from the separate function it’s clear that there needs to be some reworking of the categories queries are made and categories simplified/merged
setkey(working_data, "exogenous_genetic")
unique(working_data[,.(exogenous_genetic)]) # we need to clean up this category
unique(working_data[NA_character_,.(exogenous_genetic, genetic_background, sample_strain, sample_id)]) #what are the entries which have NA?
unique(working_data[exogenous_genetic == "",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
unique(working_data["20",.(exogenous_genetic, genetic_background, sample_strain, sample_id)]) # 1st working through the list
working_data["20", exogenous_genetic := "pos1_agrA_20"]
unique(working_data[exogenous_genetic == "pos1_empty",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
unique(working_data[exogenous_genetic == "empty",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "empty", exogenous_genetic := "pos1_empty"]
unique(working_data[exogenous_genetic == "37" | exogenous_genetic == "77",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "37" | exogenous_genetic == "77", exogenous_genetic := ""]
unique(working_data[exogenous_genetic == "37_empty" | exogenous_genetic == "77_empty",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "37_empty" | exogenous_genetic == "77_empty", exogenous_genetic := "pos1_empty"]
unique(working_data[exogenous_genetic == "37_comp",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "37_comp", exogenous_genetic := "pos1_atl"]
unique(working_data[exogenous_genetic == "comp20" | exogenous_genetic == "77_pos1_20",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "comp20" | exogenous_genetic == "77_pos1_20", exogenous_genetic := "pos1_agrA_20"]
unique(working_data[exogenous_genetic == "comp",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[exogenous_genetic == "comp" & genetic_background == "atl", exogenous_genetic := "pos1_atl"
][exogenous_genetic == "comp" & genetic_background == "ica", exogenous_genetic := "pos1_ica"
][exogenous_genetic == "comp" & genetic_background == "srtA", exogenous_genetic := "pos1_srtA"]
unique(working_data[,.(genetic_background)]) # check the other separated column
unique(working_data[genetic_background == "",.(exogenous_genetic, genetic_background, sample_strain, sample_id)])
working_data[, simple_id := as.factor(paste(genetic_background, exogenous_genetic, sep = "_"))]
some last final cleaning
working_data[date == "160324" & col == "6", drop := TRUE]
working_data <- filter(working_data, drop == FALSE)
the plotted raw OD’s in the style of Tecan output
raw_platemap <- ggplot(working_data, aes(col, row, label = sample_id)) +
geom_raster(aes(fill=OD)) +
geom_text(aes(colour = sample_strain),fontface = "bold", size = 3, angle = -45) +
facet_grid(plate~date)
raw_platemap
because the “biofilms” are sensitive to washing I am normalizing them to the “empty” (background) well and “wt” (proportional constant) well in each row if there are more than one wt samples of the same strain: the “wt” wells are averaged per row
#add the OD_adjusted column which takes the "empty" in the same row and subtracts it from the rest of the values then divde that by the adjusted "wt" value in the same row
#to test the function: x <- filter(working_data, sample_id =="wt")
norm_data <- working_data[sample_strain == "empty", empty_well_values := OD
][, OD_backgroundsub := OD - mean(empty_well_values, na.rm = TRUE), keyby = .(date, plate, row)
][sample_id == "wt", wt_well_values := OD_backgroundsub
][, OD_adjusted := OD_backgroundsub / mean(wt_well_values, na.rm = TRUE), keyby = .(date,plate,row)
][, id_merge := as.factor(paste(date, genetic_background, "KO", exogenous_genetic, sep = '_')) ] # and make a merged id
norm_data <- norm_data[!OD_adjusted < 0,] #some anomolusly low OD_adjusted values
max_OD <- max(norm_data$OD_adjusted)
min_OD <- min(norm_data$OD_adjusted)
norm_data <- norm_data %>%
mutate(OD_log = ifelse(OD_adjusted > 0, log10(OD_adjusted), 0),
OD_scale = (OD_adjusted - min_OD)/(max_OD - min_OD))%>% #use the log base 10 and scale to 0-1
filter(sample_strain != "empty", drop == FALSE)
norm_data <- norm_data[!OD_adjusted < 0] #get rid of some wierd under 0 points (what are they exactly?)
norm_platemap <- ggplot(norm_data, aes(col, row, label = sample_id)) +
geom_raster(aes(fill=OD_adjusted)) +
geom_text(fontface = "bold", size = 3, angle = -45) +
facet_grid(plate~date)
norm_platemap
since I am going to exclude the anaerobic data in the later part of the this analysis here I will look at it
summary1plot <- ggplot(norm_data, aes(sample_id, OD_adjusted)) +
scale_colour_brewer(palette = "YlOrRd") +
geom_boxplot(outlier.shape=NA) +
geom_point(aes(shape = plate, colour = date), position = "jitter") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
scale_y_log10()
summary1plot #adjusted OD by detailed groupings
summary2plot <- ggplot(norm_data, aes(simple_id, OD)) +
scale_shape_manual(values = c(1,16,7)) +
scale_colour_brewer(palette = "YlOrRd") +
geom_boxplot(outlier.shape=NA) +
geom_point(aes(shape = coating, colour = date), size = 3, position = "jitter") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
scale_y_log10()
summary2plot #OD by simple groupings
since there is some variability ove rtime with the absolute OD of the values (also because I am including FBS and non FBS treated wells) I will be using the normalized values for all analyses.
summary3plot <- ggplot(norm_data, aes(simple_id, OD_adjusted)) +
scale_shape_manual(values = c(1,16,7)) +
scale_colour_brewer(palette = "YlOrRd") +
geom_boxplot(outlier.shape=NA) +
geom_point(aes(shape = coating, colour = date), size = 3, position = "jitter") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
scale_y_log10()
summary3plot #adjusted OD by simple groupings
some deviation from 1 for wt is OK considering that there are sometimes two wt observations and drift between them…
but excessive deviation warrants some investigation. It seems like the 160326 and 160324 experiments are driving this therefore the investiagtion will begin there
after this analysis was borne out, I made the changes by adding a drop column and removing the offending data from the larger set
lots of variation in the empty wells
of the plates in 160326 plate 1 has the highest disagreement between wt, then 2, 1
unique(norm_data[simple_id == "wt_NA" & date == "160326", .(date, plate, row, OD_backgroundsub, OD, wt_well_values, OD_adjusted)])
## Empty data.table (0 rows) of 7 cols: date,plate,row,OD_backgroundsub,OD,wt_well_values...
of the plates in 160324 plate 4 has the highest disagreement between wt, then 2, 1 also edge wells are responsible
unique(norm_data[simple_id == "wt_NA" & date == "160324", .(date, plate, row, OD_backgroundsub, OD, wt_well_values, OD_adjusted)])
## date plate row OD_backgroundsub OD wt_well_values OD_adjusted
## 1: 160324 p1 b 0.2751 0.3720 0.2751 1
## 2: 160324 p1 c 0.2646 0.3645 0.2646 1
## 3: 160324 p1 d 0.3340 0.4259 0.3340 1
## 4: 160324 p1 e 0.4417 0.5432 0.4417 1
## 5: 160324 p1 f 0.3804 0.4757 0.3804 1
## 6: 160324 p1 g 0.3280 0.4279 0.3280 1
## 7: 160324 p2 b 0.2492 0.3375 0.2492 1
## 8: 160324 p2 c 0.2403 0.3305 0.2403 1
## 9: 160324 p2 d 0.2929 0.3901 0.2929 1
## 10: 160324 p2 e 0.3177 0.4384 0.3177 1
## 11: 160324 p2 f 0.3694 0.4643 0.3694 1
## 12: 160324 p2 g 0.3296 0.4442 0.3296 1
## 13: 160324 p4 b 0.2372 0.3285 0.2372 1
## 14: 160324 p4 c 0.1344 0.2238 0.1344 1
## 15: 160324 p4 d 0.2084 0.3043 0.2084 1
## 16: 160324 p4 e 0.1832 0.2718 0.1832 1
## 17: 160324 p4 f 0.2260 0.3345 0.2260 1
## 18: 160324 p4 g 0.1805 0.2936 0.1805 1
in the end I think we will exclude the experiments of 160326 and pre average wt values from 160324
data_summary <- norm_data %>%
group_by(simple_id) %>%
summarise(
mean = mean(OD_adjusted, na.rm = TRUE), # means comparison
sdev = sd(OD_adjusted, na.rm = TRUE),
ci_lower = t.test(OD_adjusted)$conf.int[1], #95% confidence intervals CANT DO IT CAUSE THE DATA?
ci_upper = t.test(OD_adjusted)$conf.int[2])
data_summary
## Source: local data table [15 x 5]
##
## simple_id mean sdev ci_lower ci_upper
## (fctr) (dbl) (dbl) (dbl) (dbl)
## 1 wt_NA 1.0000000 0.04302054 0.9912832 1.0087168
## 2 agrA_ 4.5587988 2.17442334 3.8230798 5.2945178
## 3 atl_ 0.6362879 0.45641154 0.5007504 0.7718254
## 4 icaA_ 1.2007692 0.78851209 0.8678097 1.5337286
## 5 srtA_ 0.8175688 0.52966599 0.5885240 1.0466135
## 6 atl_pos1_atl 1.5994725 0.59746574 1.4451308 1.7538143
## 7 atl_pos1_empty 0.5913342 0.34650740 0.5018219 0.6808466
## 8 icaA_pos1_empty 1.0717197 0.25459767 0.9616234 1.1818160
## 9 srtA_pos1_empty 1.7692307 0.81503527 1.4250715 2.1133899
## 10 icaA_pos1_ica 1.2929331 0.35655355 1.0663897 1.5194764
## 11 srtA_pos1_srtA 1.8293410 0.70594900 1.5312449 2.1274371
## 12 agrA_pos1_agrA_20 1.1847146 0.42873552 1.0739604 1.2954687
## 13 agrA_pos1_agrA_min 1.6518515 0.55214205 1.3010371 2.0026658
## 14 agrA_pos1_empty 1.7175058 0.53097691 1.5803399 1.8546716
## 15 ica_pos1_ica 1.1435963 0.20516336 1.0057656 1.2814269
pos = position_dodge(width = 0.9)#for error bars to dodge dodging columns
data_summary_plot <- ggplot(data_summary, aes(simple_id, mean, ymin = ci_lower, ymax = ci_upper)) +
geom_bar(aes(fill = simple_id), stat="identity", position = pos, width = 0.9) +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
geom_errorbar(aes(fill = simple_id), width = 0.2, position = pos)
data_summary_plot
OD_aggregate <- aggregate(cbind(OD, OD_adjusted, OD_log, OD_scale) ~ simple_id,
data = norm_data,
print) #the OD for each pariwise combinations, also required for Cliff's DELTA
xselect <- combn(OD_aggregate[["simple_id"]], 2)
names(OD_aggregate$OD_adjusted) <- OD_aggregate$simple_id
par(mfcol = c(5,3), mar = c(2,2,2,1))
distribution_explore <- lapply(OD_aggregate$OD_adjusted, function(x) descdist(x, boot = 1000))
par(original_par)
fit_norm <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "norm", method = "mle"))
fit_lnorm <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "lnorm", method = "mle"))
fit_exp <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "exp", method = "mle"))
fit_gamma <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "gamma", method = "mle"))
fit_weibull <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "weibull", method = "mle"))
fit_unif <- lapply(OD_aggregate$OD_adjusted, function(z) fitdist(z, "unif", method = "mle"))
fits <- c(exp = fit_exp, gamma = fit_gamma, norm = fit_norm, lnorm = fit_lnorm, unif = fit_unif, weibull = fit_weibull) #fits using untransformed data are combined
AIC_extract <- list(names(fit_norm), c("exp", "gamma", "norm", "lnorm", "unif", "weibull"))
AIC_summary <- get_AIC(fits) %>%
matrix(15,6, dimnames = AIC_extract)
AIC_summary
## exp gamma norm lnorm unif
## agrA_ 183.22826 158.5988840 161.0763948 160.673220 NA
## agrA_pos1_agrA_20 142.34022 48.4733594 71.6343914 38.878490 NA
## agrA_pos1_agrA_min 38.04552 20.8734071 22.7555900 20.227854 NA
## agrA_pos1_empty 186.90477 87.8028307 97.2997877 84.973206 NA
## atl_ 52.40642 38.0290254 61.3701602 35.090585 NA
## atl_pos1_atl 178.36087 103.7874917 111.4571957 102.608745 NA
## atl_pos1_empty 58.95513 29.9584103 46.0820633 30.949991 NA
## ica_pos1_ica 26.95191 -0.4599682 -0.6786362 -0.261482 NA
## icaA_ 58.78219 50.3178181 59.6824567 48.848643 NA
## icaA_pos1_empty 51.18617 5.7356090 5.3175281 6.420402 NA
## icaA_pos1_ica 32.16592 11.1203637 12.2598884 10.837000 NA
## srtA_ 38.73467 31.6943156 39.0153828 30.839113 NA
## srtA_pos1_empty 77.38615 61.2344188 61.2704718 63.038530 NA
## srtA_pos1_srtA 78.98988 51.5104067 54.3734294 51.341317 NA
## wt_NA 194.00000 -328.0992512 -328.6159285 -327.619426 NA
## weibull
## agrA_ 157.954380
## agrA_pos1_agrA_20 70.291659
## agrA_pos1_agrA_min 22.619058
## agrA_pos1_empty 98.317435
## atl_ 41.583302
## atl_pos1_atl 108.631569
## atl_pos1_empty 32.501154
## ica_pos1_ica -1.044914
## icaA_ 51.984351
## icaA_pos1_empty 5.129984
## icaA_pos1_ica 12.734397
## srtA_ 32.618793
## srtA_pos1_empty 59.804556
## srtA_pos1_srtA 53.220306
## wt_NA -299.804127
#the coordinates of the subset of the dat you want to look at to assess fit
#wt
index1 <- 15
legend <- c("gamma", "normal", "lognormal", "weibull") #legend text
data_subset <- list(fit_gamma[[index1]], fit_norm[[index1]], fit_lnorm[[index1]], fit_weibull[[index1]]) #to compare the normal to lognormal and weibull
par(mfcol = c(2,2), mar = c(2,2,2,1))
denscomp(data_subset, legendtext = legend)
qqcomp(data_subset, legendtext = legend)
cdfcomp(data_subset, legendtext = legend)
ppcomp(data_subset, legendtext = legend)
par(original_par)
#the coordinates of the subset of the dat you want to look at to assess fit
#agrA_
index1 <- 1
legend <- c("gamma", "normal", "lognormal", "weibull") #legend text
data_subset <- list(fit_gamma[[index1]], fit_norm[[index1]], fit_lnorm[[index1]], fit_weibull[[index1]]) #to compare the normal to lognormal and weibull
par(mfcol = c(2,2), mar = c(2,2,2,1))
denscomp(data_subset, legendtext = legend)
qqcomp(data_subset, legendtext = legend)
cdfcomp(data_subset, legendtext = legend)
ppcomp(data_subset, legendtext = legend)
par(original_par)
#the coordinates of the subset of the dat you want to look at to assess fit
#agrA_pos_20
index1 <- 2
legend <- c("gamma", "normal", "lognormal", "weibull") #legend text
data_subset <- list(fit_gamma[[index1]], fit_norm[[index1]], fit_lnorm[[index1]], fit_weibull[[index1]]) #to compare the normal to lognormal and weibull
par(mfcol = c(2,2), mar = c(2,2,2,1))
denscomp(data_subset, legendtext = legend)
qqcomp(data_subset, legendtext = legend)
cdfcomp(data_subset, legendtext = legend)
ppcomp(data_subset, legendtext = legend)
par(original_par)
#the coordinates of the subset of the dat you want to look at to assess fit
#agrA_pos1_min
index1 <- 3
legend <- c("gamma", "normal", "lognormal", "weibull") #legend text
data_subset <- list(fit_gamma[[index1]], fit_norm[[index1]], fit_lnorm[[index1]], fit_weibull[[index1]]) #to compare the normal to lognormal and weibull
par(mfcol = c(2,2), mar = c(2,2,2,1))
denscomp(data_subset, legendtext = legend)
qqcomp(data_subset, legendtext = legend)
cdfcomp(data_subset, legendtext = legend)
ppcomp(data_subset, legendtext = legend)
par(original_par)
#the coordinates of the subset of the dat you want to look at to assess fit
#agrA_pos1_empty
index1 <- 4
legend <- c("gamma", "normal", "lognormal", "weibull") #legend text
data_subset <- list(fit_gamma[[index1]], fit_norm[[index1]], fit_lnorm[[index1]], fit_weibull[[index1]]) #to compare the normal to lognormal and weibull
par(mfcol = c(2,2), mar = c(2,2,2,1))
denscomp(data_subset, legendtext = legend)
qqcomp(data_subset, legendtext = legend)
cdfcomp(data_subset, legendtext = legend)
ppcomp(data_subset, legendtext = legend)
par(original_par)
qqnorm_data <- function(x){
Q <- as.data.frame(qqnorm(x, plot = FALSE))
names(Q) <- c("xq", substitute(x))
Q
}
theoretical_qq <- norm_data %>%
group_by(simple_id) %>%
do(with(., qqnorm_data(OD_adjusted)))
ggplot(data = theoretical_qq, aes(x = xq, y = OD_adjusted)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
xlab("Theoretical") +
ylab("Sample") +
facet_wrap(~simple_id)
null: the data are normally distributed
norm_data[, shapiro.test(.SD$OD_adjusted)$p.value, by = simple_id]
## simple_id V1
## 1: wt_NA 5.067716e-14
## 2: agrA_ 1.143929e-01
## 3: atl_ 1.638651e-05
## 4: icaA_ 3.867835e-03
## 5: srtA_ 5.899671e-03
## 6: atl_pos1_atl 3.206090e-03
## 7: atl_pos1_empty 6.296682e-04
## 8: icaA_pos1_empty 7.900115e-01
## 9: srtA_pos1_empty 1.202589e-01
## 10: icaA_pos1_ica 3.430248e-01
## 11: srtA_pos1_srtA 3.053017e-01
## 12: agrA_pos1_agrA_20 1.154757e-09
## 13: agrA_pos1_agrA_min 4.638858e-02
## 14: agrA_pos1_empty 4.033927e-04
## 15: ica_pos1_ica 2.874590e-01
the null hypotheises of these tests are: all group variances are equal
bartlett.test(OD_adjusted ~ simple_id, data=norm_data) # Bartlett Test of Homogeneity of Variances (parametric)
##
## Bartlett test of homogeneity of variances
##
## data: OD_adjusted by simple_id
## Bartlett's K-squared = 782.06, df = 14, p-value < 2.2e-16
fligner.test(OD_adjusted ~ simple_id, data=norm_data) # Figner-Killeen Test of Homogeneity of Variances (nonparamatric)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: OD_adjusted by simple_id
## Fligner-Killeen:med chi-squared = 262.56, df = 14, p-value <
## 2.2e-16